library(data.table)
library(glue)
library(readr)


# 1. Open data (CRA, PSD) and match ----

cato <- readRDS(paste0(DATADIR, "htb_cra-cato-matched.rds"))

di <- readRDS(paste0(DATADIR, "htb_cus_id_matched_cra.rds")

# Next we create the `tot_balance` variable,
# now turn it into a per household table 
# from standard performance which is consumer-product
cato[, tot_balance := sum(balance, na.rm = T), 
       by = .(cra_id, date)]

ccato <- unique(cato[, .(tot_balance, cra_id, date)])

# Match with PSD
dm <- ccato[di, on = "cra_id", allow.cartesian = T]



# 2. Create sample for stats ----

# every date should be first day of the month (as in cato data)
dm[, datem := lubridate::floor_date(origination_date, 'month')]

# between 0 and three months after
dm[, mdiff := as.numeric(date-datem)]
dm <- dm[mdiff %in% c(0:100)]

dm[, dmon := floor(mdiff/30)]
dm[, table(mdiff, dmon)]

# Define main category: gov
dm[, gov_support := gov == "Government supported initiative - Yes"]

# no accounts with zero balance
dm0 <- dm[tot_balance > 0]



# 3. Stats ----

g0 <- dm0[gov_support == F, .(
  pct10 = quantile(tot_balance, 0.10),
  pct50 = quantile(tot_balance, 0.50),
  pct90 = quantile(tot_balance, 0.90),
  .N
), by = dmon]

g1 <- dm0[gov_support == T, .(
  pct10 = quantile(tot_balance, 0.10),
  pct50 = quantile(tot_balance, 0.50),
  pct90 = quantile(tot_balance, 0.90),
  .N
), by = dmon]

xtable::xtable(g0, digits = 0)
xtable::xtable(g1, digits = 0)
